	title	lzcomp - file compressor using limpel-ziv algorithm

;Tom Pfau
;Digital Equipment Corporation
;Parsippany, NJ

;Constants
clear		equ	256		;Clear code
eof		equ	257		;End of file marker
first_free	equ	258		;First free code
maxmax		equ	4096		;Max code + 1

	include macros.mlb

;Hash table entry
hash_rec	struc
first	dw	?			; First entry with this value
next	dw	?			; Next entry along chain
char	db	?			; Suffix char
hash_rec	ends

;Declare Segments
code	segment byte public 'code'
code	ends
stack	segment word stack 'stack'
	dw	128 dup (?)
stack	ends
data	segment word public 'data'
data	ends
memory	segment para public 'memory'
hash	label	hash_rec
memory	ends

;Start writing code
code	segment
	assume	cs:code,ds:data,es:data,ss:stack

start	proc	far
	mov	bx,seg hash		;End of program
	mov	ax,ds			;Start of program
	sub	bx,ax			;Program size
	inc	bx			;Make sure
	setmem	bx			;Set our size
	mov	bx,data			;Set up data segment addressability
	mov	es,bx
	mov	ds,bx
	print	input_prompt		;Get input file name
	input	input_file
	print	crlf
	print	output_prompt		;And output
	input	output_file
	print	crlf
	mov	al,input_file+1		;Terminate file names with nulls
	xor	ah,ah
	mov	si,ax
	mov	input_file+2[si],0
	mov	al,output_file+1
	mov	si,ax
	mov	output_file+2[si],0
	hopen	input_file+2,0
	mov	input_handle,ax
	hcreat	output_file+2,0
	mov	output_handle,ax
	call	compress		;Compress file
	hclose	input_handle		;Close in and out
	hclose	output_handle
	exit				;Done
start	endp

data	segment
input_prompt	db	'Input file: $'
output_prompt	db	'Output file: $'
input_file	db	80,0,80 dup (?)
output_file	db	80,0,80 dup (?)
crlf		db	13,10,'$'
input_handle	dw	?
output_handle	dw	?
data	ends

compress	proc	near
	malloc	1280			;Allocate space for hash table
	mov	hash_seg,ax		;Save segment address
l1:	call	init_table		;Initialize the table and some vars
	mov	ax,clear		;Write a clear code
	call	write_code
	call	read_char		;Read first char
l4:	xor	ah,ah			;Turn char into code
l4a:	mov	prefix_code,ax		;Set prefix code
	call	read_char		;Read next char
	jc	l17			;Carry means eof
	mov	k,al			;Save char in k
	mov	bx,prefix_code		;Get prefix code
	call	lookup_code		;See if this pair in table
	jnc	l4a			;nc means yes, new code in ax
	call	add_code		;Add pair to table
	push	bx			;Save new code
	mov	ax,prefix_code		;Write old prefix code
	call	write_code
	pop	bx
	mov	al,k			;Get last char
	cmp	bx,max_code		;Exceed code size?
	jl	l4			;less means no
	cmp	nbits,12		;Currently less than 12 bits?
	jl	l14			;yes
	mov	ax,clear		;Write a clear code
	call	write_code
	call	init_table		;Reinit table
	mov	al,k			;get last char
	jmp	l4			;Start over
l14:	inc	nbits			;Increase number of bits
	shl	max_code,1		;Double max code size
	jmp	l4			;Get next char
l17:	mov	ax,prefix_code		;Write last code
	call	write_code
	mov	ax,eof			;Write eof code
	call	write_code
	mov	ax,bit_offset		;Make sure buffer is flushed to file
	cmp	ax,0
	je	l18
	mov	cx,8			;convert bits to bytes
	xor	dx,dx
	div	cx
	or	dx,dx			;If extra bits, make sure they get
	je	l17a			;written
	inc	ax
l17a:	call	flush
l18:	ret
compress	endp

data	segment
hash_seg	dw	?
prefix_code	dw	?
free_code	dw	?
max_code	dw	?
nbits		dw	?
k		db	?
data	ends

init_table	proc	near
	mov	nbits,9			;Set code size to 9
	mov	max_code,512		;Set max code to 512
	push	es			;Save seg reg
	mov	es,hash_seg		;Address hash table
	mov	ax,-1			;Unused flag
	mov	cx,640			;Clear first 256 entries
	mov	di,offset hash		;Point to first entry
rep	stosw				;Clear it out
	pop	es			;Restore seg reg
	mov	free_code,first_free	;Set next code to use
	ret				;done
init_table	endp

write_code	proc	near
	push	ax			;Save code
	mov	ax,bit_offset		;Get bit offset
	mov	cx,nbits		;Adjust bit offset by code size
	add	bit_offset,cx
	mov	cx,8			;Convert bit offset to byte offset
	xor	dx,dx
	div	cx
	cmp	ax,1020			;Approaching end of buffer?
	jl	wc1			;less means no
	call	flush			;Write the buffer
	push	dx			;dx contains offset within byte
	add	dx,nbits		;adjust by code size
	mov	bit_offset,dx		;new bit offset
	pop	dx			;restore dx
	add	ax,offset output_data	;Point to last byte
	mov	si,ax			;put in si
	mov	al,byte ptr [si]	;move byte to first position
	mov	output_data,al
	xor	ax,ax			;Byte offset of zero
wc1:	add	ax,offset output_data	;Point into buffer
	mov	di,ax			;Destination
	pop	ax			;Restore code
	mov	cx,dx			;offset within byte
	xor	dx,dx			;dx will catch bits rotated out
	jcxz	wc3			;If offset in byte is zero, skip shift
wc2:	shl	ax,1			;Rotate code
	rcl	dx,1
	loop	wc2
	or	al,byte ptr [di]	;Grab bits currently in buffer
wc3:	stosw				;Save data
	mov	al,dl			;Grab extra bits
	stosb				;and save
	ret
write_code	endp

data	segment
bit_offset	dw	?
output_data	db	1024 dup (?)
data	ends

flush		proc	near
	push	ax			;Save all registers
	push	bx			;AX contains number of bytes to write
	push	cx
	push	dx
	hwrite	output_handle,output_data,ax	;write output file
	pop	dx
	pop	cx
	pop	bx
	pop	ax
	ret
flush		endp

read_char	proc	near
	mov	di,input_offset		;Anything left in buffer?
	cmp	di,input_size
	jl	rd1			;less means yes
	hread	input_handle,input_data,1024	;Read next chunk of input
	cmp	ax,0			;Anything left?
	je	rd2			;equal means no, finished
	mov	input_size,ax		;Save bytes read
	mov	input_offset,0		;Point to beginning of buffer
	mov	di,0
rd1:	lea	si,input_data[di]	;Point at character
	lodsb				;Read it in
	inc	input_offset		;Adjust pointer
	clc				;Success
	ret
rd2:	stc				;Nothing left
	ret
read_char	endp

data	segment
input_data	db	1024 dup (?)
input_offset	dw	0
input_size	dw	0
data	ends

lookup_code	proc	near
	push	ds			;Save seg reg
	mov	ds,hash_seg		;point to hash table
	call	index			;convert code to address
	mov	di,0			;flag
	cmp	[si].first,-1		;Has this code been used?
	je	gc4			;equal means no
	inc	di			;set flag
	mov	bx,[si].first		;Get first entry
gc2:	call	index			;convert code to address
	cmp	[si].char,al		;is char the same?
	jne	gc3			;ne means no
	clc				;success
	mov	ax,bx			;put found code in ax
	pop	ds			;restore seg reg
	ret				;done
gc3:	cmp	[si].next,-1		;More left with this prefix?
	je	gc4			;equal means no
	mov	bx,[si].next		;get next code
	jmp	gc2			;try again
gc4:	stc				;not found
	pop	ds			;restore seg reg
	ret				;done
lookup_code	endp

index		proc	near
	mov	si,bx			;si = bx * 5 (5 byte hash entries)
	shl	si,1			;si = bx * 2 * 2 + bx
	shl	si,1
	add	si,bx
	ret
index		endp

add_code	proc	near
	mov	bx,free_code		;Get code to use
	push	ds			;point to hash table
	mov	ds,hash_seg
	cmp	di,0			;First use of this prefix?
	je	ac1			;equal means yes
	mov	[si].next,bx		;point last use to new entry
	jmp	short ac2
ac1:	mov	[si].first,bx		;Point first use to new entry
ac2:	cmp	bx,maxmax		;Have we reached code limit?
	je	ac3			;equal means yes, just return
	call	index			;get address of new entry
	mov	[si].first,-1		;initialize pointers
	mov	[si].next,-1
	mov	[si].char,al		;save suffix char
	inc	es:free_code		;adjust next code
ac3:	pop	ds			;restore seg reg
	ret
add_code	endp

code	ends

	end	start
